Some intro text?

load packages

library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages ----------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.3     v purrr   0.3.4
v tibble  3.0.6     v dplyr   1.0.4
v tidyr   1.1.2     v stringr 1.4.0
v readr   1.4.0     v forcats 0.5.1
-- Conflicts -------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(janitor)

Attache Paket: 㤼㸱janitor㤼㸲

The following objects are masked from 㤼㸱package:stats㤼㸲:

    chisq.test, fisher.test
library(ggthemes)
library(plotly)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Attache Paket: 㤼㸱plotly㤼㸲

The following object is masked from 㤼㸱package:ggplot2㤼㸲:

    last_plot

The following object is masked from 㤼㸱package:stats㤼㸲:

    filter

The following object is masked from 㤼㸱package:graphics㤼㸲:

    layout
library(hexbin)
library(viridis)
Lade n昼㸶tiges Paket: viridisLite
library(corrplot)
corrplot 0.84 loaded

assign data from csv to a variable (tbl) named ‘data’

data <- read_csv('../data/MeteorStrikesDataSet1.csv') %>%
          clean_names()

-- Column specification ----------------------------------------------------------------------------------------------------------------------------------
cols(
  place = col_character(),
  year = col_double(),
  mass_g = col_double(),
  longitude = col_double(),
  latitude = col_double(),
  fell_found = col_character()
)

first look at data with head, tail and summary

head(data, 50)
tail(data)
summary(data)
    place                year          mass_g           longitude          latitude       fell_found       
 Length:34065       Min.   :-600   Min.   :       0   Min.   :-165.43   Min.   :-87.37   Length:34065      
 Class :character   1st Qu.:1981   1st Qu.:       6   1st Qu.:  26.00   1st Qu.:-76.90   Class :character  
 Mode  :character   Median :1990   Median :      25   Median :  55.25   Median :-72.00   Mode  :character  
                    Mean   :1987   Mean   :   17084   Mean   :  70.54   Mean   :-48.05                     
                    3rd Qu.:2000   3rd Qu.:     176   3rd Qu.: 159.26   3rd Qu.: 15.37                     
                    Max.   :2012   Max.   :60000000   Max.   : 178.20   Max.   : 81.17                     
                    NA's   :142                                                                            
data %>%
  filter(mass_g < 1000) %>%
  summary()
    place                year          mass_g          longitude          latitude       fell_found       
 Length:30258       Min.   :-600   Min.   :  0.010   Min.   :-165.43   Min.   :-87.37   Length:30258      
 Class :character   1st Qu.:1984   1st Qu.:  4.503   1st Qu.:  35.67   1st Qu.:-79.68   Class :character  
 Mode  :character   Median :1990   Median : 18.080   Median :  56.70   Median :-72.77   Mode  :character  
                    Mean   :1990   Mean   : 91.143   Mean   :  78.29   Mean   :-55.43                     
                    3rd Qu.:2000   3rd Qu.: 84.823   3rd Qu.: 159.67   3rd Qu.:-71.50                     
                    Max.   :2012   Max.   :999.900   Max.   : 175.00   Max.   : 70.73                     
                    NA's   :111                                                                           
missingYear <- data %>% 
  filter(is.na(year))

missingYear

look at distribution of numerical data

histogramYears <- ggplot(data, aes(x = year)) +
  geom_histogram(bins = 90) +
  ggtitle('distribution of meteors per year') + 
  theme_fivethirtyeight()

ggplotly(histogramYears)
Removed 142 rows containing non-finite values (stat_bin).
qqnorm(data$year)

data1800y <- data %>%
  filter(year > 1800)

qqnorm(data1800y$year)

data1960y <- data %>%
  filter(year > 1960)

qqnorm(data1960y$year)

barplot1960 <- ggplot(data1960y, aes(x = year)) +
  geom_bar() +
  theme_fivethirtyeight()

ggplotly(barplot1960)
#histogram1980 <- ggplot(data1980y, aes(x = year)) +
#  geom_histogram(bins = 90) +
#  ggtitle('distribution of meteors per year') + 
#  theme_fivethirtyeight()

#ggplotly(histogram1980)
histogramMass <- ggplot(data, aes(x = mass_g)) +
  geom_histogram(bins = 90) +
  ggtitle('distribution of mass_g of meteorites') +
  theme_fivethirtyeight()

ggplotly(histogramMass)
qqnorm(data$mass_g)

data1000g <- data %>%
  filter(mass_g < 1e+04)

qqnorm(data1000g$mass_g)

histogram1000g <- ggplot(data1000g, aes(x = mass_g)) +
  geom_histogram(bins = 90) +
  ggtitle('distribution of mass_g of meteorites') +
  theme_fivethirtyeight()

ggplotly(histogram1000g)
data176g <- data %>%
  filter(mass_g < 176)

qqnorm(data176g$mass_g)

histogram176g <- ggplot(data176g, aes(x = mass_g)) +
  geom_histogram(bins = 90) +
  ggtitle('distribution of mass_g of meteorites') +
  theme_fivethirtyeight()

ggplotly(histogram176g)
data25g <- data %>%
  filter(mass_g < 25)

qqnorm(data25g$mass_g)

histogram25g <- ggplot(data25g, aes(x = mass_g)) +
  geom_histogram(bins = 90) +
  ggtitle('distribution of mass_g of meteorites') +
  theme_fivethirtyeight()

ggplotly(histogram25g)
histogramLogMass <- ggplot(data, aes(x = mass_g)) +
  geom_histogram(bins = 70) +
  ggtitle('distribution of mass_g of meteorites') +
  theme_fivethirtyeight() + 
  scale_x_log10()

ggplotly(histogramLogMass)
ggplot(data, aes(x = fell_found)) +
  geom_bar() +
  ggtitle('comparison of found and fell meteorites') +
  xlab('') + 
  theme_fivethirtyeight()

longOlat <- ggplot(data) +
  geom_point(mapping = aes(y = latitude, x = longitude), alpha = 1 / 1)

longOlat

#ggplotly(longOlat)
longOlatAlp <- ggplot(data) +
  geom_point(mapping = aes(y = latitude, x = longitude), alpha = 1 / 20)

longOlatAlp

#ggplotly(longOlatAlp)
longOlatHex <- ggplot(data) +
  geom_hex(mapping = aes(y = latitude, x = longitude)) +
  scale_fill_viridis()

ggplotly(longOlatHex)
massYear <- ggplot(data) +
  geom_point(mapping = aes(x = year, y = mass_g), alpha = 1 / 1)

massYear

#ggplotly(massYear)
data1980y1e4m <- data %>%
  filter(year > 1800) %>%
  filter(mass_g < 1e6)

massYearSmall <- ggplot(data1980y1e4m) +
  geom_point(mapping = aes(x = year, y = mass_g), alpha = 1 / 10)

massYearSmall

#ggplotly(massYear)
data1980y1e4m <- data %>%
  filter(year > 1940) %>%
  filter(mass_g < 1e4)

massYearSmall <- ggplot(data1980y1e4m) +
  geom_point(mapping = aes(x = year, y = mass_g), alpha = 1 / 10)

massYearSmall

#ggplotly(massYear)
data %>%
  select_if(is.numeric) %>%
  cor() %>%
  corrplot(method = "square")

data %>%
  filter(fell_found == 'Fell')
data %>%
  filter(year >= 2000)
data %>%
  filter(longitude > 165) %>%
  filter(latitude < -80) %>%
  ggplot() +
    geom_hex(mapping = aes(y = latitude, x = longitude)) + 
    scale_fill_viridis()

data %>%
  filter(longitude < 50) %>%
  filter(longitude > 0) %>%
  filter(latitude < -50) %>%
  ggplot() +
    geom_hex(mapping = aes(y = latitude, x = longitude)) + 
    scale_fill_viridis()

data %>%
  filter(longitude < 35.75) %>%
  filter(longitude > 35.6) %>%
  filter(latitude < -71) %>%
  filter(latitude > -72) %>%
  ggplot() +
    geom_hex(mapping = aes(y = latitude, x = longitude)) + 
    scale_fill_viridis()

LS0tDQp0aXRsZTogIkVEQSBtZXRlb3Igc3RyaWtlIGRhdGEiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpTb21lIGludHJvIHRleHQ/DQoNCg0KbG9hZCBwYWNrYWdlcw0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShqYW5pdG9yKQ0KbGlicmFyeShnZ3RoZW1lcykNCmxpYnJhcnkocGxvdGx5KQ0KbGlicmFyeShoZXhiaW4pDQpsaWJyYXJ5KHZpcmlkaXMpDQpsaWJyYXJ5KGNvcnJwbG90KQ0KYGBgDQoNCmFzc2lnbiBkYXRhIGZyb20gY3N2IHRvIGEgdmFyaWFibGUgKHRibCkgbmFtZWQgJ2RhdGEnDQoNCmBgYHtyfQ0KZGF0YSA8LSByZWFkX2NzdignLi4vZGF0YS9NZXRlb3JTdHJpa2VzRGF0YVNldDEuY3N2JykgJT4lDQogICAgICAgICAgY2xlYW5fbmFtZXMoKQ0KYGBgDQoNCmZpcnN0IGxvb2sgYXQgZGF0YSB3aXRoIGhlYWQsIHRhaWwgYW5kIHN1bW1hcnkNCg0KYGBge3J9DQpoZWFkKGRhdGEsIDUwKQ0KYGBgDQoNCg0KYGBge3J9DQp0YWlsKGRhdGEpDQpgYGANCg0KYGBge3J9DQpzdW1tYXJ5KGRhdGEpDQpgYGANCmBgYHtyfQ0KZGF0YSAlPiUNCiAgZmlsdGVyKG1hc3NfZyA8IDEwMDApICU+JQ0KICBzdW1tYXJ5KCkNCmBgYA0KDQoNCmBgYHtyfQ0KbWlzc2luZ1llYXIgPC0gZGF0YSAlPiUgDQogIGZpbHRlcihpcy5uYSh5ZWFyKSkNCg0KbWlzc2luZ1llYXINCmBgYA0KDQpsb29rIGF0IGRpc3RyaWJ1dGlvbiBvZiBudW1lcmljYWwgZGF0YQ0KDQpgYGB7cn0NCmhpc3RvZ3JhbVllYXJzIDwtIGdncGxvdChkYXRhLCBhZXMoeCA9IHllYXIpKSArDQogIGdlb21faGlzdG9ncmFtKGJpbnMgPSA5MCkgKw0KICBnZ3RpdGxlKCdkaXN0cmlidXRpb24gb2YgbWV0ZW9ycyBwZXIgeWVhcicpICsgDQogIHRoZW1lX2ZpdmV0aGlydHllaWdodCgpDQoNCmdncGxvdGx5KGhpc3RvZ3JhbVllYXJzKQ0KYGBgDQoNCmBgYHtyfQ0KcXFub3JtKGRhdGEkeWVhcikNCmBgYA0KYGBge3J9DQpkYXRhMTgwMHkgPC0gZGF0YSAlPiUNCiAgZmlsdGVyKHllYXIgPiAxODAwKQ0KDQpxcW5vcm0oZGF0YTE4MDB5JHllYXIpDQpgYGANCg0KYGBge3J9DQpkYXRhMTk2MHkgPC0gZGF0YSAlPiUNCiAgZmlsdGVyKHllYXIgPiAxOTYwKQ0KDQpxcW5vcm0oZGF0YTE5NjB5JHllYXIpDQpgYGANCg0KYGBge3J9DQpiYXJwbG90MTk2MCA8LSBnZ3Bsb3QoZGF0YTE5NjB5LCBhZXMoeCA9IHllYXIpKSArDQogIGdlb21fYmFyKCkgKw0KICB0aGVtZV9maXZldGhpcnR5ZWlnaHQoKQ0KDQpnZ3Bsb3RseShiYXJwbG90MTk2MCkvDQpgYGANCg0KDQpgYGB7cn0NCiNoaXN0b2dyYW0xOTgwIDwtIGdncGxvdChkYXRhMTk4MHksIGFlcyh4ID0geWVhcikpICsNCiMgIGdlb21faGlzdG9ncmFtKGJpbnMgPSA5MCkgKw0KIyAgZ2d0aXRsZSgnZGlzdHJpYnV0aW9uIG9mIG1ldGVvcnMgcGVyIHllYXInKSArIA0KIyAgdGhlbWVfZml2ZXRoaXJ0eWVpZ2h0KCkNCg0KI2dncGxvdGx5KGhpc3RvZ3JhbTE5ODApDQpgYGANCg0KDQpgYGB7cn0NCmhpc3RvZ3JhbU1hc3MgPC0gZ2dwbG90KGRhdGEsIGFlcyh4ID0gbWFzc19nKSkgKw0KICBnZW9tX2hpc3RvZ3JhbShiaW5zID0gOTApICsNCiAgZ2d0aXRsZSgnZGlzdHJpYnV0aW9uIG9mIG1hc3NfZyBvZiBtZXRlb3JpdGVzJykgKw0KICB0aGVtZV9maXZldGhpcnR5ZWlnaHQoKQ0KDQpnZ3Bsb3RseShoaXN0b2dyYW1NYXNzKQ0KYGBgDQoNCmBgYHtyfQ0KcXFub3JtKGRhdGEkbWFzc19nKQ0KYGBgDQoNCmBgYHtyfQ0KZGF0YTEwMDBnIDwtIGRhdGEgJT4lDQogIGZpbHRlcihtYXNzX2cgPCAxZSswNCkNCg0KcXFub3JtKGRhdGExMDAwZyRtYXNzX2cpDQpgYGANCg0KDQpgYGB7cn0NCmhpc3RvZ3JhbTEwMDBnIDwtIGdncGxvdChkYXRhMTAwMGcsIGFlcyh4ID0gbWFzc19nKSkgKw0KICBnZW9tX2hpc3RvZ3JhbShiaW5zID0gOTApICsNCiAgZ2d0aXRsZSgnZGlzdHJpYnV0aW9uIG9mIG1hc3NfZyBvZiBtZXRlb3JpdGVzJykgKw0KICB0aGVtZV9maXZldGhpcnR5ZWlnaHQoKQ0KDQpnZ3Bsb3RseShoaXN0b2dyYW0xMDAwZykNCmBgYA0KDQpgYGB7cn0NCmRhdGExNzZnIDwtIGRhdGEgJT4lDQogIGZpbHRlcihtYXNzX2cgPCAxNzYpDQoNCnFxbm9ybShkYXRhMTc2ZyRtYXNzX2cpDQpgYGANCg0KDQpgYGB7cn0NCmhpc3RvZ3JhbTE3NmcgPC0gZ2dwbG90KGRhdGExNzZnLCBhZXMoeCA9IG1hc3NfZykpICsNCiAgZ2VvbV9oaXN0b2dyYW0oYmlucyA9IDkwKSArDQogIGdndGl0bGUoJ2Rpc3RyaWJ1dGlvbiBvZiBtYXNzX2cgb2YgbWV0ZW9yaXRlcycpICsNCiAgdGhlbWVfZml2ZXRoaXJ0eWVpZ2h0KCkNCg0KZ2dwbG90bHkoaGlzdG9ncmFtMTc2ZykNCmBgYA0KDQpgYGB7cn0NCmRhdGEyNWcgPC0gZGF0YSAlPiUNCiAgZmlsdGVyKG1hc3NfZyA8IDI1KQ0KDQpxcW5vcm0oZGF0YTI1ZyRtYXNzX2cpDQpgYGANCg0KDQpgYGB7cn0NCmhpc3RvZ3JhbTI1ZyA8LSBnZ3Bsb3QoZGF0YTI1ZywgYWVzKHggPSBtYXNzX2cpKSArDQogIGdlb21faGlzdG9ncmFtKGJpbnMgPSA5MCkgKw0KICBnZ3RpdGxlKCdkaXN0cmlidXRpb24gb2YgbWFzc19nIG9mIG1ldGVvcml0ZXMnKSArDQogIHRoZW1lX2ZpdmV0aGlydHllaWdodCgpDQoNCmdncGxvdGx5KGhpc3RvZ3JhbTI1ZykNCmBgYA0KDQpgYGB7cn0NCmhpc3RvZ3JhbUxvZ01hc3MgPC0gZ2dwbG90KGRhdGEsIGFlcyh4ID0gbWFzc19nKSkgKw0KICBnZW9tX2hpc3RvZ3JhbShiaW5zID0gNzApICsNCiAgZ2d0aXRsZSgnZGlzdHJpYnV0aW9uIG9mIG1hc3NfZyBvZiBtZXRlb3JpdGVzJykgKw0KICB0aGVtZV9maXZldGhpcnR5ZWlnaHQoKSArIA0KICBzY2FsZV94X2xvZzEwKCkNCg0KZ2dwbG90bHkoaGlzdG9ncmFtTG9nTWFzcykNCmBgYA0KDQpgYGB7cn0NCmdncGxvdChkYXRhLCBhZXMoeCA9IGZlbGxfZm91bmQpKSArDQogIGdlb21fYmFyKCkgKw0KICBnZ3RpdGxlKCdjb21wYXJpc29uIG9mIGZvdW5kIGFuZCBmZWxsIG1ldGVvcml0ZXMnKSArDQogIHhsYWIoJycpICsgDQogIHRoZW1lX2ZpdmV0aGlydHllaWdodCgpDQpgYGANCg0KYGBge3J9DQpsb25nT2xhdCA8LSBnZ3Bsb3QoZGF0YSkgKw0KICBnZW9tX3BvaW50KG1hcHBpbmcgPSBhZXMoeSA9IGxhdGl0dWRlLCB4ID0gbG9uZ2l0dWRlKSwgYWxwaGEgPSAxIC8gMSkNCg0KbG9uZ09sYXQNCiNnZ3Bsb3RseShsb25nT2xhdCkNCmBgYA0KDQpgYGB7cn0NCmxvbmdPbGF0QWxwIDwtIGdncGxvdChkYXRhKSArDQogIGdlb21fcG9pbnQobWFwcGluZyA9IGFlcyh5ID0gbGF0aXR1ZGUsIHggPSBsb25naXR1ZGUpLCBhbHBoYSA9IDEgLyAyMCkNCg0KbG9uZ09sYXRBbHANCiNnZ3Bsb3RseShsb25nT2xhdEFscCkNCmBgYA0KDQoNCmBgYHtyfQ0KbG9uZ09sYXRIZXggPC0gZ2dwbG90KGRhdGEpICsNCiAgZ2VvbV9oZXgobWFwcGluZyA9IGFlcyh5ID0gbGF0aXR1ZGUsIHggPSBsb25naXR1ZGUpKSArDQogIHNjYWxlX2ZpbGxfdmlyaWRpcygpDQoNCmdncGxvdGx5KGxvbmdPbGF0SGV4KQ0KYGBgDQoNCmBgYHtyfQ0KbWFzc1llYXIgPC0gZ2dwbG90KGRhdGEpICsNCiAgZ2VvbV9wb2ludChtYXBwaW5nID0gYWVzKHggPSB5ZWFyLCB5ID0gbWFzc19nKSwgYWxwaGEgPSAxIC8gMSkNCg0KbWFzc1llYXINCiNnZ3Bsb3RseShtYXNzWWVhcikNCmBgYA0KDQpgYGB7cn0NCmRhdGExOTgweTFlNG0gPC0gZGF0YSAlPiUNCiAgZmlsdGVyKHllYXIgPiAxODAwKSAlPiUNCiAgZmlsdGVyKG1hc3NfZyA8IDFlNikNCg0KbWFzc1llYXJTbWFsbCA8LSBnZ3Bsb3QoZGF0YTE5ODB5MWU0bSkgKw0KICBnZW9tX3BvaW50KG1hcHBpbmcgPSBhZXMoeCA9IHllYXIsIHkgPSBtYXNzX2cpLCBhbHBoYSA9IDEgLyAxMCkNCg0KbWFzc1llYXJTbWFsbA0KI2dncGxvdGx5KG1hc3NZZWFyKQ0KYGBgDQoNCmBgYHtyfQ0KZGF0YTE5ODB5MWU0bSA8LSBkYXRhICU+JQ0KICBmaWx0ZXIoeWVhciA+IDE5NDApICU+JQ0KICBmaWx0ZXIobWFzc19nIDwgMWU0KQ0KDQptYXNzWWVhclNtYWxsIDwtIGdncGxvdChkYXRhMTk4MHkxZTRtKSArDQogIGdlb21fcG9pbnQobWFwcGluZyA9IGFlcyh4ID0geWVhciwgeSA9IG1hc3NfZyksIGFscGhhID0gMSAvIDEwKQ0KDQptYXNzWWVhclNtYWxsDQojZ2dwbG90bHkobWFzc1llYXIpDQpgYGANCg0KYGBge3J9DQpkYXRhICU+JQ0KICBzZWxlY3RfaWYoaXMubnVtZXJpYykgJT4lDQogIGNvcigpICU+JQ0KICBjb3JycGxvdChtZXRob2QgPSAic3F1YXJlIikNCg0KYGBgDQpgYGB7cn0NCmRhdGEgJT4lDQogIGZpbHRlcihmZWxsX2ZvdW5kID09ICdGZWxsJykNCmBgYA0KDQpgYGB7cn0NCmRhdGEgJT4lDQogIGZpbHRlcih5ZWFyID49IDIwMDApDQpgYGANCg0KYGBge3J9DQpkYXRhICU+JQ0KICBmaWx0ZXIobG9uZ2l0dWRlID4gMTY1KSAlPiUNCiAgZmlsdGVyKGxhdGl0dWRlIDwgLTgwKSAlPiUNCiAgZ2dwbG90KCkgKw0KICAgIGdlb21faGV4KG1hcHBpbmcgPSBhZXMoeSA9IGxhdGl0dWRlLCB4ID0gbG9uZ2l0dWRlKSkgKyANCiAgICBzY2FsZV9maWxsX3ZpcmlkaXMoKQ0KDQpgYGANCg0KYGBge3J9DQpkYXRhICU+JQ0KICBmaWx0ZXIobG9uZ2l0dWRlIDwgNTApICU+JQ0KICBmaWx0ZXIobG9uZ2l0dWRlID4gMCkgJT4lDQogIGZpbHRlcihsYXRpdHVkZSA8IC01MCkgJT4lDQogIGdncGxvdCgpICsNCiAgICBnZW9tX2hleChtYXBwaW5nID0gYWVzKHkgPSBsYXRpdHVkZSwgeCA9IGxvbmdpdHVkZSkpICsgDQogICAgc2NhbGVfZmlsbF92aXJpZGlzKCkNCg0KYGBgDQoNCmBgYHtyfQ0KZGF0YSAlPiUNCiAgZmlsdGVyKGxvbmdpdHVkZSA8IDM1Ljc1KSAlPiUNCiAgZmlsdGVyKGxvbmdpdHVkZSA+IDM1LjYpICU+JQ0KICBmaWx0ZXIobGF0aXR1ZGUgPCAtNzEpICU+JQ0KICBmaWx0ZXIobGF0aXR1ZGUgPiAtNzIpICU+JQ0KICBnZ3Bsb3QoKSArDQogICAgZ2VvbV9oZXgobWFwcGluZyA9IGFlcyh5ID0gbGF0aXR1ZGUsIHggPSBsb25naXR1ZGUpKSArIA0KICAgIHNjYWxlX2ZpbGxfdmlyaWRpcygpDQoNCmBgYA==